library(SpatialExperiment)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.3
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(spatialLIBD)
library(ggplot2)

spe_weighted <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/30_percent_nonSVGs/spe_weighted_nnSVG_2.rds")
spe_unweighted <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/30_percent_nonSVGs/spe_nnSVG_2.rds")

adj_p_values_weighted <- p.adjust(1 - pchisq(rowData(spe_weighted)$weighted_LR_stat, df=2), method = "BH")
rowData(spe_weighted)$weighted_padj <- adj_p_values_weighted
# guiding question: understanding the difference between the weighted and unweighted simulations. why is the weighted simulation more conservative and lower TNR? why does the unweighted simulation capture the SVGs perfectly?
# make a 2x2 table of SVGs with high and low mean and high and low sigma.sq

# find mean of ground_truth_sigma.sq for nonzero values
nonzero_mean <- mean(rowData(spe_unweighted)$ground_truth_sigma.sq[rowData(spe_unweighted)$ground_truth_sigma.sq > 0], na.rm = TRUE)

# find mean of mean  
mean_mean <- mean(rowData(spe_unweighted)$mean, na.rm = TRUE)

svg_high_mean_high_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq > nonzero_mean & rowData(spe_unweighted)$mean > mean_mean, na.rm = TRUE)
svg_high_mean_low_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq < nonzero_mean & rowData(spe_unweighted)$mean > mean_mean, na.rm = TRUE)
svg_low_mean_high_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq > nonzero_mean & rowData(spe_unweighted)$mean < mean_mean, na.rm = TRUE)
svg_low_mean_low_sigma_sq <- sum(rowData(spe_unweighted)$ground_truth_sigma.sq > 0 & rowData(spe_unweighted)$ground_truth_sigma.sq < nonzero_mean & rowData(spe_unweighted)$mean < mean_mean, na.rm = TRUE)

svg_table <- matrix(c(svg_high_mean_high_sigma_sq, svg_high_mean_low_sigma_sq, svg_low_mean_high_sigma_sq, svg_low_mean_low_sigma_sq), nrow = 2, byrow = TRUE)

svg_table
##      [,1] [,2]
## [1,]  194  144
## [2,]  162  200
# are the weighted and unweighted simulations capturing the same SVGs?
tp_unweighted <- which(rowData(spe_unweighted)$ground_truth_sigma.sq != 0 & rowData(spe_unweighted)$padj <= 0.05)
tp_weighted <- which(rowData(spe_weighted)$ground_truth_sigma.sq != 0 & rowData(spe_weighted)$weighted_padj <= 0.05)

sum(tp_unweighted == tp_weighted)
## [1] 700
length(tp_unweighted)
## [1] 700
length(tp_weighted)
## [1] 700
# yes, they are capturing the all the SVGs and the same SVGs
# are the weighted and unweighted simulations capturing the same non-SVGs?
tn_unweighted <- rowData(spe_unweighted)$ground_truth_sigma.sq == 0 & rowData(spe_unweighted)$padj > 0.05
tn_weighted <- rowData(spe_weighted)$ground_truth_sigma.sq == 0 & rowData(spe_weighted)$weighted_padj > 0.05
    
sum(tn_unweighted == tn_weighted)
## [1] 961
sum(tn_unweighted)
## [1] 290
sum(tn_weighted)
## [1] 253
#no, the weighted simulation is capturing fewer true negatives
# dig more into the differences in true negatives between the weighted and unweighted simulations

# find the gene indices of the true negatives that are not getting captured by the weighted simulation
tn_unweighted_not_weighted <- which(tn_unweighted & !tn_weighted)
length(tn_unweighted_not_weighted) # = 38
## [1] 38
# find the gene indices of the true negatives that are not getting captured by the unweighted simulation
tn_weighted_not_unweighted <- which(tn_weighted & !tn_unweighted)
length(tn_weighted_not_unweighted) # = 1
## [1] 1
# find the gene indices of the true negatives that are getting captured by both simulations
tn_both <- which(tn_weighted & tn_unweighted)
length(tn_both) # = 252
## [1] 252
# find the dist of the mean of the true negatives that are not getting captured by the weighted simulation
fivenum(rowData(spe_unweighted)$mean[tn_unweighted_not_weighted], na.rm = TRUE)
## [1] 0.5428591 1.0027867 1.9281350 2.4914284 2.9097387
# find the dist of the mean of the true negatives that are not getting captured by the unweighted simulation
fivenum(rowData(spe_unweighted)$mean[tn_weighted_not_unweighted], na.rm = TRUE)
## [1] 2.318769 2.318769 2.318769 2.318769 2.318769
# find the dist of the mean of the true negatives that are getting captured by both simulations
fivenum(rowData(spe_unweighted)$mean[tn_both], na.rm = TRUE)
## [1] 0.4299314 0.7665668 1.2582967 1.9123613 2.9213226
# means are a bit higher for the true negatives that are not getting captured by the weighted simulation
# find the distribution of mean weights for the true negatives that are not getting captured by the weighted simulation
# weights is spots x genes
weights <- readRDS("reps_high_range_sigma.sq_high_beta_1000_genes_SVG_percentages_subset/30_percent_nonSVGs/weights_2.rds")

fivenum(weights[,tn_unweighted_not_weighted])
## [1]  1.624791  2.229350  4.965491  7.205001 34.948928
fivenum(weights[,tn_weighted_not_unweighted])
## [1]  4.621942  5.471900  5.742093  6.191111 34.948928
fivenum(weights[,tn_both])
## [1]  1.624791  2.016317  2.441631  4.477998 34.948928
fivenum(colMeans(weights[,tn_unweighted_not_weighted]))
## [1]  1.885011  2.243264  4.953570  8.231561 16.867777
fivenum(weights[,tn_weighted_not_unweighted]) # there is only 1 gene
## [1]  4.621942  5.471900  5.742093  6.191111 34.948928
fivenum(colMeans(weights[,tn_both]))
## [1]  1.675474  2.000548  2.470684  4.601287 14.296343
# weights are a bit higher for the true negatives that are not getting captured by the weighted simulation
# create some spot plots to visualize the differences 
for (ix in tn_weighted_not_unweighted) {
 df <- as.data.frame(cbind(spatialCoords(spe_unweighted), expr = logcounts(spe_unweighted)[ix, ]))
 print(ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres, 
                color = expr)) + 
   geom_point(size = 2) + 
   coord_fixed() + 
   scale_y_reverse() + 
   scale_color_viridis_c(name = "logcounts") + 
   ggtitle("") + 
   theme_bw() + 
   theme(plot.title = element_text(face = "italic"), 
         panel.grid = element_blank(), 
         axis.title = element_blank(), 
         axis.text = element_blank(), 
         axis.ticks = element_blank())
 )
}

# create some spot plots to visualize the differences 
for (ix in tn_unweighted_not_weighted) {
 df <- as.data.frame(cbind(spatialCoords(spe_unweighted), expr = logcounts(spe_unweighted)[ix, ]))
 print(ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres, 
                color = expr)) + 
   geom_point(size = 2) + 
   coord_fixed() + 
   scale_y_reverse() + 
   scale_color_viridis_c(name = "logcounts") + 
   ggtitle("") + 
   theme_bw() + 
   theme(plot.title = element_text(face = "italic"), 
         panel.grid = element_blank(), 
         axis.title = element_blank(), 
         axis.text = element_blank(), 
         axis.ticks = element_blank())
 )
}

# create some spot plots to visualize the differences 
# sample 20 genes that are getting captured by both simulations
set.seed(1)
ixs <- sample(tn_both, 20)

for (ix in ixs) {
 df <- as.data.frame(cbind(spatialCoords(spe_unweighted), expr = logcounts(spe_unweighted)[ix, ]))
 print(ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres, 
                color = expr)) + 
   geom_point(size = 2) + 
   coord_fixed() + 
   scale_y_reverse() + 
   scale_color_viridis_c(name = "logcounts") + 
   ggtitle("") + 
   theme_bw() + 
   theme(plot.title = element_text(face = "italic"), 
         panel.grid = element_blank(), 
         axis.title = element_blank(), 
         axis.text = element_blank(), 
         axis.ticks = element_blank())
 )
}

# create some spot plots to visualize the differences 
# sample 20 genes that are true positives
# all true positives are getting captured by both simulations
set.seed(1)
ixs <- sample(tp_unweighted, 20)

for (ix in ixs) {
 df <- as.data.frame(cbind(spatialCoords(spe_unweighted), expr = logcounts(spe_unweighted)[ix, ]))
 print(ggplot(df, aes(x = pxl_col_in_fullres, y = pxl_row_in_fullres, 
                color = expr)) + 
   geom_point(size = 2) + 
   coord_fixed() + 
   scale_y_reverse() + 
   scale_color_viridis_c(name = "logcounts") + 
   ggtitle("") + 
   theme_bw() + 
   theme(plot.title = element_text(face = "italic"), 
         panel.grid = element_blank(), 
         axis.title = element_blank(), 
         axis.text = element_blank(), 
         axis.ticks = element_blank())
 )
}